{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pandas import DataFrame,Series\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df1=DataFrame({'key':['b','b','a','c','a','s','b'],\n",
    "             'data1':range(7)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df2=DataFrame({'key':['a','b','d'],'data2':range(3)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s</th>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     data1  data2\n",
       "key              \n",
       "a      2.0    NaN\n",
       "a      4.0    NaN\n",
       "a      NaN    0.0\n",
       "b      0.0    NaN\n",
       "b      1.0    NaN\n",
       "b      6.0    NaN\n",
       "b      NaN    1.0\n",
       "c      3.0    NaN\n",
       "d      NaN    2.0\n",
       "s      5.0    NaN"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3=pd.concat([df1,df2],axis=0,join='outer',ignore_index=True).set_index(['key'],drop=True).sort_index()\n",
    "df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.concat?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>key</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>b</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6</td>\n",
       "      <td>b</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   data1 key  data2\n",
       "0      0   b      1\n",
       "1      1   b      1\n",
       "2      6   b      1\n",
       "3      2   a      0\n",
       "4      4   a      0"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4=pd.merge(df1,df2,on=['key'])\n",
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>key</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>a</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>c</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>a</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>s</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   data1 key  data2\n",
       "0      0   b    1.0\n",
       "1      1   b    1.0\n",
       "2      2   a    0.0\n",
       "3      3   c    NaN\n",
       "4      4   a    0.0\n",
       "5      5   s    NaN\n",
       "6      6   b    1.0"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4=pd.merge(df1,df2,on=['key'],how='left')\n",
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>key</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6.0</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2.0</td>\n",
       "      <td>a</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.0</td>\n",
       "      <td>a</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3.0</td>\n",
       "      <td>c</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>5.0</td>\n",
       "      <td>s</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>d</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   data1 key  data2\n",
       "0    0.0   b    1.0\n",
       "1    1.0   b    1.0\n",
       "2    6.0   b    1.0\n",
       "3    2.0   a    0.0\n",
       "4    4.0   a    0.0\n",
       "5    3.0   c    NaN\n",
       "6    5.0   s    NaN\n",
       "7    NaN   d    2.0"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4=pd.merge(df1,df2,on=['key'],how='outer')\n",
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>key</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>a</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>c</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>a</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>s</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>b</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   data1 key  data2\n",
       "0      0   b    1.0\n",
       "1      1   b    1.0\n",
       "2      2   a    0.0\n",
       "3      3   c    NaN\n",
       "4      4   a    0.0\n",
       "5      5   s    NaN\n",
       "6      6   b    1.0"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4=pd.merge(df1,df2,on=['key'],how='left')\n",
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "arr= np.arange(12).reshape((3,4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3,  0,  1,  2,  3],\n",
       "       [ 4,  5,  6,  7,  4,  5,  6,  7],\n",
       "       [ 8,  9, 10, 11,  8,  9, 10, 11]])"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.concatenate([arr,arr],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "s1=Series([0,1],index=['a','b'])\n",
    "s2=Series([2,3,4],index=['c','d','e'])\n",
    "s3=Series([5,6],index=['f','g'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    1\n",
       "f    5\n",
       "g    6\n",
       "c    2\n",
       "d    3\n",
       "e    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([s1,s3,s2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data=Series([])\n",
    "data = pd.concat([data,s1,s2,s3],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>f</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>g</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0    1    2    3\n",
       "a NaN  0.0  NaN  NaN\n",
       "b NaN  1.0  NaN  NaN\n",
       "c NaN  NaN  2.0  NaN\n",
       "d NaN  NaN  3.0  NaN\n",
       "e NaN  NaN  4.0  NaN\n",
       "f NaN  NaN  NaN  5.0\n",
       "g NaN  NaN  NaN  6.0"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "s4=pd.concat([s1*5,s3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0  1\n",
       "a  0  0\n",
       "b  1  5"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([s1,s4],axis=1,join='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1\n",
       "a  0.0  0.0\n",
       "c  NaN  NaN\n",
       "b  1.0  5.0\n",
       "e  NaN  NaN"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([s1,s4],axis=1,join_axes=[['a','c','b','e']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df1= DataFrame(np.arange(6).reshape((3,2)),index=['a','b','c'],columns=['one','two'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df2= DataFrame(np.arange(4).reshape((2,2)),index=['a','c'],columns=['one','two'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two\n",
       "a  0.0  2.0\n",
       "b  NaN  NaN\n",
       "c  6.0  8.0"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1+df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two\n",
       "a    0    1\n",
       "b    2    3\n",
       "c    4    5\n",
       "a    0    1\n",
       "c    2    3"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2],axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two  one  two\n",
       "a    0    1  0.0  1.0\n",
       "b    2    3  NaN  NaN\n",
       "c    4    5  2.0  3.0"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">level1</th>\n",
       "      <th colspan=\"2\" halign=\"left\">level2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  level1     level2     \n",
       "     one two    one  two\n",
       "a      0   1    0.0  1.0\n",
       "b      2   3    NaN  NaN\n",
       "c      4   5    2.0  3.0"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2],axis=1,keys=['level1','level2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pd.concat?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 合并重叠数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a= Series([np.nan,2.5,np.nan,3.5,4.5,np.nan],index=['f','e','d','c','b','a'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "b=Series(np.arange(len(a)),dtype=np.float64,index=['f','e','d','c','b','a'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "b[-1]=np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "f    NaN\n",
       "e    2.5\n",
       "d    NaN\n",
       "c    3.5\n",
       "b    4.5\n",
       "a    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "f    0.0\n",
       "e    1.0\n",
       "d    2.0\n",
       "c    3.0\n",
       "b    4.0\n",
       "a    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    NaN\n",
       "b    4.5\n",
       "c    3.0\n",
       "d    2.0\n",
       "e    1.0\n",
       "f    0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b[:-2].combine_first(a[2:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0. ,  2.5,  2. ,  3.5,  4.5,  nan])"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.where(pd.isnull(a),b,a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df1=DataFrame({'a':[1,np.nan,5.,np.nan],'b':[np.nan,2.,np.nan,6.],'c':range(2,18,4)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df2=DataFrame({'a':[5.,4.,np.nan,3.,7.],\n",
    "              'b':[np.nan,3.,4.,6.,8.]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     a    b   c\n",
       "0  1.0  NaN   2\n",
       "1  NaN  2.0   6\n",
       "2  5.0  NaN  10\n",
       "3  NaN  6.0  14"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     a    b     c\n",
       "0  1.0  NaN   2.0\n",
       "1  4.0  2.0   6.0\n",
       "2  5.0  4.0  10.0\n",
       "3  3.0  6.0  14.0\n",
       "4  7.0  8.0   NaN"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.combine_first(df2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 重塑 和 轴转向"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data=DataFrame(np.arange(6).reshape((2,3)),index=pd.Index(['Ohio','Colorado'],name='state'),columns=pd.Index(['one','two','three'],name='number'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>number</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "number    one  two  three\n",
       "state                    \n",
       "Ohio        0    1      2\n",
       "Colorado    3    4      5"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "result=data.stack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state     number\n",
       "Ohio      one       0\n",
       "          two       1\n",
       "          three     2\n",
       "Colorado  one       3\n",
       "          two       4\n",
       "          three     5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>number</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "number    one  two  three\n",
       "state                    \n",
       "Ohio        0    1      2\n",
       "Colorado    3    4      5"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>state</th>\n",
       "      <th>Ohio</th>\n",
       "      <th>Colorado</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>number</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "state   Ohio  Colorado\n",
       "number                \n",
       "one        0         3\n",
       "two        1         4\n",
       "three      2         5"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.unstack(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>state</th>\n",
       "      <th>Ohio</th>\n",
       "      <th>Colorado</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>number</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "state   Ohio  Colorado\n",
       "number                \n",
       "one        0         3\n",
       "two        1         4\n",
       "three      2         5"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.unstack('state')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>number</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "number    one  two  three\n",
       "state                    \n",
       "Ohio        0    1      2\n",
       "Colorado    3    4      5"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.unstack('number')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "one  a    0\n",
       "     b    1\n",
       "     c    2\n",
       "     d    3\n",
       "two  c    4\n",
       "     d    5\n",
       "     e    6\n",
       "dtype: int64"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1=Series([0,1,2,3],index=['a','b','c','d'])\n",
    "s2=Series([4,5,6],index=['c','d','e'])\n",
    "data2=pd.concat([s1,s2],keys=['one','two'])\n",
    "data2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "one  a    0.0\n",
       "     b    1.0\n",
       "     c    2.0\n",
       "     d    3.0\n",
       "two  c    4.0\n",
       "     d    5.0\n",
       "     e    6.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2.unstack().stack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "one  a    0.0\n",
       "     b    1.0\n",
       "     c    2.0\n",
       "     d    3.0\n",
       "     e    NaN\n",
       "two  a    NaN\n",
       "     b    NaN\n",
       "     c    4.0\n",
       "     d    5.0\n",
       "     e    6.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2.unstack().stack(dropna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>side</th>\n",
       "      <th>left</th>\n",
       "      <th>right</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th>number</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Ohio</th>\n",
       "      <th>one</th>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Colorado</th>\n",
       "      <th>one</th>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "side             left  right\n",
       "state    number             \n",
       "Ohio     one        0      5\n",
       "         two        1      6\n",
       "         three      2      7\n",
       "Colorado one        3      8\n",
       "         two        4      9\n",
       "         three      5     10"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=DataFrame({'left':result,'right':result+5},columns=pd.Index(['left','right'],name='side'));df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>side</th>\n",
       "      <th colspan=\"2\" halign=\"left\">left</th>\n",
       "      <th colspan=\"2\" halign=\"left\">right</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th>Ohio</th>\n",
       "      <th>Colorado</th>\n",
       "      <th>Ohio</th>\n",
       "      <th>Colorado</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>number</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "side   left          right         \n",
       "state  Ohio Colorado  Ohio Colorado\n",
       "number                             \n",
       "one       0        3     5        8\n",
       "two       1        4     6        9\n",
       "three     2        5     7       10"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.unstack('state')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state     number  side \n",
       "Ohio      one     left      0\n",
       "                  right     5\n",
       "          two     left      1\n",
       "                  right     6\n",
       "          three   left      2\n",
       "                  right     7\n",
       "Colorado  one     left      3\n",
       "                  right     8\n",
       "          two     left      4\n",
       "                  right     9\n",
       "          three   left      5\n",
       "                  right    10\n",
       "dtype: int64"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.stack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>side</th>\n",
       "      <th colspan=\"3\" halign=\"left\">left</th>\n",
       "      <th colspan=\"3\" halign=\"left\">right</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>number</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "side     left           right          \n",
       "number    one two three   one two three\n",
       "state                                  \n",
       "Ohio        0   1     2     5   6     7\n",
       "Colorado    3   4     5     8   9    10"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state     number  side \n",
       "Ohio      one     left      0\n",
       "                  right     5\n",
       "          two     left      1\n",
       "                  right     6\n",
       "          three   left      2\n",
       "                  right     7\n",
       "Colorado  one     left      3\n",
       "                  right     8\n",
       "          two     left      4\n",
       "                  right     9\n",
       "          three   left      5\n",
       "                  right    10\n",
       "dtype: int64"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sta=df.stack();sta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>Ohio</th>\n",
       "      <th>Colorado</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>number</th>\n",
       "      <th>side</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">one</th>\n",
       "      <th>left</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>right</th>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">two</th>\n",
       "      <th>left</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>right</th>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">three</th>\n",
       "      <th>left</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>right</th>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "state         Ohio  Colorado\n",
       "number side                 \n",
       "one    left      0         3\n",
       "       right     5         8\n",
       "two    left      1         4\n",
       "       right     6         9\n",
       "three  left      2         5\n",
       "       right     7        10"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sta.unstack('state')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>food</th>\n",
       "      <th>ounces</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>bacon</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pulled pork</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>bacon</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pastrami</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>corned beef</td>\n",
       "      <td>7.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Bacon</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>pastrami</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>honey ham</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>nova lox</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          food  ounces\n",
       "0        bacon     4.0\n",
       "1  pulled pork     3.0\n",
       "2        bacon    12.0\n",
       "3     Pastrami     6.0\n",
       "4  corned beef     7.5\n",
       "5        Bacon     8.0\n",
       "6     pastrami     3.0\n",
       "7    honey ham     5.0\n",
       "8     nova lox     6.0"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data=DataFrame({'food':['bacon','pulled pork','bacon','Pastrami','corned beef','Bacon','pastrami','honey ham','nova lox']\n",
    "               ,'ounces':[4,3,12,6,7.5,8,3,5,6]});data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 映射数据转换和数据替换和重命名轴"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "meat_to_animal = {\n",
    "    'bacon': 'pig',\n",
    "    'pulled pork':'piog',\n",
    "    'pastrami':'cow',\n",
    "    'corned beef':'cwo',\n",
    "    'honey ham':'pig',\n",
    "    'nova lox':'salmon'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>food</th>\n",
       "      <th>ounces</th>\n",
       "      <th>animal</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>bacon</td>\n",
       "      <td>4.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pulled pork</td>\n",
       "      <td>3.0</td>\n",
       "      <td>piog</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>bacon</td>\n",
       "      <td>12.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pastrami</td>\n",
       "      <td>6.0</td>\n",
       "      <td>cow</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>corned beef</td>\n",
       "      <td>7.5</td>\n",
       "      <td>cwo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Bacon</td>\n",
       "      <td>8.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>pastrami</td>\n",
       "      <td>3.0</td>\n",
       "      <td>cow</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>honey ham</td>\n",
       "      <td>5.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>nova lox</td>\n",
       "      <td>6.0</td>\n",
       "      <td>salmon</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          food  ounces  animal\n",
       "0        bacon     4.0     pig\n",
       "1  pulled pork     3.0    piog\n",
       "2        bacon    12.0     pig\n",
       "3     Pastrami     6.0     cow\n",
       "4  corned beef     7.5     cwo\n",
       "5        Bacon     8.0     pig\n",
       "6     pastrami     3.0     cow\n",
       "7    honey ham     5.0     pig\n",
       "8     nova lox     6.0  salmon"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['animal']=data['food'].map(str.lower).map(meat_to_animal);data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       pig\n",
       "1      piog\n",
       "2       pig\n",
       "3       cow\n",
       "4       cwo\n",
       "5       pig\n",
       "6       cow\n",
       "7       pig\n",
       "8    salmon\n",
       "Name: food, dtype: object"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['food'].map(lambda x: meat_to_animal[x.lower()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       1.0\n",
       "1    -999.0\n",
       "2       2.0\n",
       "3    -999.0\n",
       "4   -1000.0\n",
       "5       3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = Series([1.,-999,2.,-999,-1000,3.]);data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       1.0\n",
       "1       NaN\n",
       "2       2.0\n",
       "3       NaN\n",
       "4   -1000.0\n",
       "5       3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.replace(-999,np.nan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "1    NaN\n",
       "2    2.0\n",
       "3    NaN\n",
       "4    NaN\n",
       "5    3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.replace([-999,-1000],np.nan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data =DataFrame(np.arange(12).reshape((3,4)),\n",
    "               index=['Ohio','Colorado','New York'],\n",
    "               columns=['one','two','three','four'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['OHIO', 'COLORADO', 'NEW YORK'], dtype=object)"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.index.map(str.upper)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>OHIO</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>COLORADO</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NEW YORK</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "OHIO        0    1      2     3\n",
       "COLORADO    4    5      6     7\n",
       "NEW YORK    8    9     10    11"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.index=data.index.map(str.upper);data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ONE</th>\n",
       "      <th>TWO</th>\n",
       "      <th>THREE</th>\n",
       "      <th>FOUR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          ONE  TWO  THREE  FOUR\n",
       "Ohio        0    1      2     3\n",
       "Colorado    4    5      6     7\n",
       "New York    8    9     10    11"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.rename(index=str.title,columns=str.upper)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>peekaboo</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>INDIANA</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>COLORADO</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NEW YORK</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  peekaboo  four\n",
       "INDIANA     0    1         2     3\n",
       "COLORADO    4    5         6     7\n",
       "NEW YORK    8    9        10    11"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.rename(index={'OHIO':'INDIANA'},columns={'three':'peekaboo'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "_=data.rename(index={'OHIO':'INDIANA'},inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>INDIANA</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>COLORADO</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NEW YORK</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "INDIANA     0    1      2     3\n",
       "COLORADO    4    5      6     7\n",
       "NEW YORK    8    9     10    11"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 离散化和面元划分 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]\n",
       "Length: 11\n",
       "Categories (4, object): [(18, 25] < (25, 35] < (35, 60] < (60, 100]]"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ages = [20,22,25,27,23,37,32,61,45,41,32]\n",
    "bins=[18,25,35,60,100]\n",
    "cats=pd.cut(ages,bins);cats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:1: FutureWarning: 'labels' is deprecated. Use 'codes' instead\n",
      "  if __name__ == '__main__':\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 1, 0, 2, 1, 3, 2, 2, 1], dtype=int8)"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cats.labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 1, 0, 2, 1, 3, 2, 2, 1], dtype=int8)"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cats.codes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(18, 25]     4\n",
       "(35, 60]     3\n",
       "(25, 35]     3\n",
       "(60, 100]    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.value_counts(cats)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 检测和过滤异常值 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.random.seed(12345)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data=DataFrame(np.random.randn(1000,4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>-0.067684</td>\n",
       "      <td>0.067924</td>\n",
       "      <td>0.025598</td>\n",
       "      <td>-0.002298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.998035</td>\n",
       "      <td>0.992106</td>\n",
       "      <td>1.006835</td>\n",
       "      <td>0.996794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-3.428254</td>\n",
       "      <td>-3.548824</td>\n",
       "      <td>-3.184377</td>\n",
       "      <td>-3.745356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-0.774890</td>\n",
       "      <td>-0.591841</td>\n",
       "      <td>-0.641675</td>\n",
       "      <td>-0.644144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>-0.116401</td>\n",
       "      <td>0.101143</td>\n",
       "      <td>0.002073</td>\n",
       "      <td>-0.013611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.616366</td>\n",
       "      <td>0.780282</td>\n",
       "      <td>0.680391</td>\n",
       "      <td>0.654328</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>3.366626</td>\n",
       "      <td>2.653656</td>\n",
       "      <td>3.260383</td>\n",
       "      <td>3.927528</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 0            1            2            3\n",
       "count  1000.000000  1000.000000  1000.000000  1000.000000\n",
       "mean     -0.067684     0.067924     0.025598    -0.002298\n",
       "std       0.998035     0.992106     1.006835     0.996794\n",
       "min      -3.428254    -3.548824    -3.184377    -3.745356\n",
       "25%      -0.774890    -0.591841    -0.641675    -0.644144\n",
       "50%      -0.116401     0.101143     0.002073    -0.013611\n",
       "75%       0.616366     0.780282     0.680391     0.654328\n",
       "max       3.366626     2.653656     3.260383     3.927528"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "col=data[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "97     3.927528\n",
       "305   -3.399312\n",
       "400   -3.745356\n",
       "Name: 3, dtype: float64"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col[np.abs(col)>3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-0.539741</td>\n",
       "      <td>0.476985</td>\n",
       "      <td>3.248944</td>\n",
       "      <td>-1.021228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>-0.774363</td>\n",
       "      <td>0.552936</td>\n",
       "      <td>0.106061</td>\n",
       "      <td>3.927528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>-0.655054</td>\n",
       "      <td>-0.565230</td>\n",
       "      <td>3.176873</td>\n",
       "      <td>0.959533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>305</th>\n",
       "      <td>-2.315555</td>\n",
       "      <td>0.457246</td>\n",
       "      <td>-0.025907</td>\n",
       "      <td>-3.399312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>324</th>\n",
       "      <td>0.050188</td>\n",
       "      <td>1.951312</td>\n",
       "      <td>3.260383</td>\n",
       "      <td>0.963301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>400</th>\n",
       "      <td>0.146326</td>\n",
       "      <td>0.508391</td>\n",
       "      <td>-0.196713</td>\n",
       "      <td>-3.745356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>-0.293333</td>\n",
       "      <td>-0.242459</td>\n",
       "      <td>-3.056990</td>\n",
       "      <td>1.918403</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>523</th>\n",
       "      <td>-3.428254</td>\n",
       "      <td>-0.296336</td>\n",
       "      <td>-0.439938</td>\n",
       "      <td>-0.867165</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>586</th>\n",
       "      <td>0.275144</td>\n",
       "      <td>1.179227</td>\n",
       "      <td>-3.184377</td>\n",
       "      <td>1.369891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>808</th>\n",
       "      <td>-0.362528</td>\n",
       "      <td>-3.548824</td>\n",
       "      <td>1.553205</td>\n",
       "      <td>-2.186301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>900</th>\n",
       "      <td>3.366626</td>\n",
       "      <td>-2.372214</td>\n",
       "      <td>0.851010</td>\n",
       "      <td>1.332846</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            0         1         2         3\n",
       "5   -0.539741  0.476985  3.248944 -1.021228\n",
       "97  -0.774363  0.552936  0.106061  3.927528\n",
       "102 -0.655054 -0.565230  3.176873  0.959533\n",
       "305 -2.315555  0.457246 -0.025907 -3.399312\n",
       "324  0.050188  1.951312  3.260383  0.963301\n",
       "400  0.146326  0.508391 -0.196713 -3.745356\n",
       "499 -0.293333 -0.242459 -3.056990  1.918403\n",
       "523 -3.428254 -0.296336 -0.439938 -0.867165\n",
       "586  0.275144  1.179227 -3.184377  1.369891\n",
       "808 -0.362528 -3.548824  1.553205 -2.186301\n",
       "900  3.366626 -2.372214  0.851010  1.332846"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[(np.abs(data)>3).any(1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    True\n",
       "1    True\n",
       "2    True\n",
       "3    True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 162,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(np.abs(data)>3).any(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>970</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>971</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>972</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>973</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>974</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>975</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>976</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>977</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>978</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>979</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>981</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>982</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>983</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>984</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>985</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>986</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>987</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>988</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>989</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>990</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>991</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>992</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>993</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       0    1    2    3\n",
       "0   -3.0  3.0 -3.0 -3.0\n",
       "1    3.0  3.0  3.0  3.0\n",
       "2    3.0  3.0  3.0 -3.0\n",
       "3    3.0  3.0  3.0  3.0\n",
       "4   -3.0 -3.0  3.0 -3.0\n",
       "5   -3.0  3.0  3.0 -3.0\n",
       "6   -3.0  3.0  3.0  3.0\n",
       "7    3.0  3.0 -3.0 -3.0\n",
       "8   -3.0 -3.0 -3.0  3.0\n",
       "9   -3.0  3.0 -3.0  3.0\n",
       "10  -3.0 -3.0 -3.0 -3.0\n",
       "11  -3.0  3.0  3.0 -3.0\n",
       "12   3.0  3.0  3.0  3.0\n",
       "13  -3.0  3.0  3.0 -3.0\n",
       "14  -3.0  3.0  3.0 -3.0\n",
       "15   3.0 -3.0  3.0  3.0\n",
       "16   3.0 -3.0 -3.0 -3.0\n",
       "17  -3.0 -3.0 -3.0  3.0\n",
       "18   3.0  3.0  3.0 -3.0\n",
       "19  -3.0 -3.0 -3.0  3.0\n",
       "20   3.0 -3.0  3.0  3.0\n",
       "21   3.0  3.0 -3.0  3.0\n",
       "22  -3.0  3.0  3.0  3.0\n",
       "23  -3.0  3.0  3.0  3.0\n",
       "24   3.0 -3.0  3.0  3.0\n",
       "25  -3.0 -3.0 -3.0 -3.0\n",
       "26  -3.0 -3.0  3.0  3.0\n",
       "27  -3.0  3.0 -3.0  3.0\n",
       "28   3.0 -3.0 -3.0 -3.0\n",
       "29   3.0 -3.0  3.0 -3.0\n",
       "..   ...  ...  ...  ...\n",
       "970  3.0  3.0  3.0 -3.0\n",
       "971 -3.0 -3.0 -3.0  3.0\n",
       "972  3.0  3.0 -3.0 -3.0\n",
       "973 -3.0  3.0  3.0 -3.0\n",
       "974 -3.0 -3.0  3.0 -3.0\n",
       "975 -3.0 -3.0  3.0 -3.0\n",
       "976  3.0  3.0  3.0  3.0\n",
       "977 -3.0  3.0  3.0  3.0\n",
       "978  3.0 -3.0  3.0  3.0\n",
       "979  3.0  3.0 -3.0  3.0\n",
       "980 -3.0 -3.0 -3.0 -3.0\n",
       "981 -3.0  3.0  3.0  3.0\n",
       "982 -3.0  3.0  3.0  3.0\n",
       "983 -3.0  3.0  3.0  3.0\n",
       "984  3.0 -3.0  3.0 -3.0\n",
       "985 -3.0  3.0  3.0  3.0\n",
       "986 -3.0 -3.0  3.0 -3.0\n",
       "987 -3.0 -3.0  3.0 -3.0\n",
       "988 -3.0  3.0 -3.0 -3.0\n",
       "989  3.0  3.0  3.0  3.0\n",
       "990 -3.0 -3.0 -3.0  3.0\n",
       "991  3.0 -3.0  3.0  3.0\n",
       "992 -3.0  3.0 -3.0 -3.0\n",
       "993  3.0  3.0 -3.0  3.0\n",
       "994  3.0 -3.0  3.0  3.0\n",
       "995  3.0  3.0 -3.0  3.0\n",
       "996 -3.0 -3.0 -3.0 -3.0\n",
       "997 -3.0  3.0  3.0  3.0\n",
       "998 -3.0 -3.0 -3.0 -3.0\n",
       "999  3.0  3.0  3.0  3.0\n",
       "\n",
       "[1000 rows x 4 columns]"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sign(data)*3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df=DataFrame(np.arange(5*4).reshape(5,4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sampler=np.random.permutation(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 3, 4, 0, 2])"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sampler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1   2   3\n",
       "1   4   5   6   7\n",
       "3  12  13  14  15\n",
       "4  16  17  18  19\n",
       "0   0   1   2   3\n",
       "2   8   9  10  11"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.take(sampler)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1   2   3\n",
       "1   4   5   6   7\n",
       "3  12  13  14  15\n",
       "0   0   1   2   3\n",
       "2   8   9  10  11\n",
       "4  16  17  18  19"
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.take(np.random.permutation(len(df)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "bag=np.array([5,7,-1,6,4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sampler = np.random.randint(0,len(bag),size=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 2, 0, 3, 0, 4, 1, 1, 2, 3])"
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sampler"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 计量标量 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = DataFrame({'key':['b','b','a','c','a','b'],'data1':range(6)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b  c\n",
       "0  0  1  0\n",
       "1  0  1  0\n",
       "2  1  0  0\n",
       "3  0  0  1\n",
       "4  1  0  0\n",
       "5  0  1  0"
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.get_dummies(df['key'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dummies=pd.get_dummies(df['key'],prefix='ddd')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_with_dummy=df[['data1']].join(dummies)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   data1 key\n",
       "0      0   b\n",
       "1      1   b\n",
       "2      2   a\n",
       "3      3   c\n",
       "4      4   a\n",
       "5      5   b"
      ]
     },
     "execution_count": 181,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    b\n",
       "1    b\n",
       "2    a\n",
       "3    c\n",
       "4    a\n",
       "5    b\n",
       "Name: key, dtype: object"
      ]
     },
     "execution_count": 182,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['key']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CH1.ipynb            ShapeYourData.ipynb  data1.txt\r\n",
      "CH2.html             TushareWork.ipynb    figpath.png\r\n",
      "\u001b[34mCH2_files\u001b[m\u001b[m/           Untitled.ipynb       figure_1.png\r\n",
      "IPython.ipynb        array_archive.npz    ipython_log.py\r\n",
      "Load files.ipynb     \u001b[34mdata\u001b[m\u001b[m/                meshplot.png\r\n",
      "MovieLens.ipynb      data.csv             mydata.csv\r\n",
      "NewBirth.ipynb       data.txt             \u001b[31mq2.py\u001b[m\u001b[m*\r\n",
      "Pandas.ipynb         data.xlsx            some_array.npy\r\n"
     ]
    }
   ],
   "source": [
    "df_with_dummy\n",
    "%pwd\n",
    "%ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  from ipykernel import kernelapp as app\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Animation|Children's|Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children's|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>Heat (1995)</td>\n",
       "      <td>Action|Crime|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>Sabrina (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>Tom and Huck (1995)</td>\n",
       "      <td>Adventure|Children's</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>Sudden Death (1995)</td>\n",
       "      <td>Action</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>GoldenEye (1995)</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>American President, The (1995)</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>Dracula: Dead and Loving It (1995)</td>\n",
       "      <td>Comedy|Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>Balto (1995)</td>\n",
       "      <td>Animation|Children's</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>Nixon (1995)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>Cutthroat Island (1995)</td>\n",
       "      <td>Action|Adventure|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>Casino (1995)</td>\n",
       "      <td>Drama|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>Sense and Sensibility (1995)</td>\n",
       "      <td>Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>Four Rooms (1995)</td>\n",
       "      <td>Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>Ace Ventura: When Nature Calls (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>Money Train (1995)</td>\n",
       "      <td>Action</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>21</td>\n",
       "      <td>Get Shorty (1995)</td>\n",
       "      <td>Action|Comedy|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>22</td>\n",
       "      <td>Copycat (1995)</td>\n",
       "      <td>Crime|Drama|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>23</td>\n",
       "      <td>Assassins (1995)</td>\n",
       "      <td>Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>24</td>\n",
       "      <td>Powder (1995)</td>\n",
       "      <td>Drama|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>25</td>\n",
       "      <td>Leaving Las Vegas (1995)</td>\n",
       "      <td>Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>26</td>\n",
       "      <td>Othello (1995)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>Now and Then (1995)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>Persuasion (1995)</td>\n",
       "      <td>Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>City of Lost Children, The (1995)</td>\n",
       "      <td>Adventure|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>30</td>\n",
       "      <td>Shanghai Triad (Yao a yao yao dao waipo qiao) ...</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3853</th>\n",
       "      <td>3923</td>\n",
       "      <td>Return of the Fly (1959)</td>\n",
       "      <td>Horror|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3854</th>\n",
       "      <td>3924</td>\n",
       "      <td>Pajama Party (1964)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3855</th>\n",
       "      <td>3925</td>\n",
       "      <td>Stranger Than Paradise (1984)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3856</th>\n",
       "      <td>3926</td>\n",
       "      <td>Voyage to the Bottom of the Sea (1961)</td>\n",
       "      <td>Adventure|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3857</th>\n",
       "      <td>3927</td>\n",
       "      <td>Fantastic Voyage (1966)</td>\n",
       "      <td>Adventure|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3858</th>\n",
       "      <td>3928</td>\n",
       "      <td>Abbott and Costello Meet Frankenstein (1948)</td>\n",
       "      <td>Comedy|Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3859</th>\n",
       "      <td>3929</td>\n",
       "      <td>Bank Dick, The (1940)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3860</th>\n",
       "      <td>3930</td>\n",
       "      <td>Creature From the Black Lagoon, The (1954)</td>\n",
       "      <td>Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3861</th>\n",
       "      <td>3931</td>\n",
       "      <td>Giant Gila Monster, The (1959)</td>\n",
       "      <td>Horror|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3862</th>\n",
       "      <td>3932</td>\n",
       "      <td>Invisible Man, The (1933)</td>\n",
       "      <td>Horror|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3863</th>\n",
       "      <td>3933</td>\n",
       "      <td>Killer Shrews, The (1959)</td>\n",
       "      <td>Horror|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3864</th>\n",
       "      <td>3934</td>\n",
       "      <td>Kronos (1957)</td>\n",
       "      <td>Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3865</th>\n",
       "      <td>3935</td>\n",
       "      <td>Kronos (1973)</td>\n",
       "      <td>Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3866</th>\n",
       "      <td>3936</td>\n",
       "      <td>Phantom of the Opera, The (1943)</td>\n",
       "      <td>Drama|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3867</th>\n",
       "      <td>3937</td>\n",
       "      <td>Runaway (1984)</td>\n",
       "      <td>Sci-Fi|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3868</th>\n",
       "      <td>3938</td>\n",
       "      <td>Slumber Party Massacre, The (1982)</td>\n",
       "      <td>Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3869</th>\n",
       "      <td>3939</td>\n",
       "      <td>Slumber Party Massacre II, The (1987)</td>\n",
       "      <td>Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3870</th>\n",
       "      <td>3940</td>\n",
       "      <td>Slumber Party Massacre III, The (1990)</td>\n",
       "      <td>Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3871</th>\n",
       "      <td>3941</td>\n",
       "      <td>Sorority House Massacre (1986)</td>\n",
       "      <td>Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3872</th>\n",
       "      <td>3942</td>\n",
       "      <td>Sorority House Massacre II (1990)</td>\n",
       "      <td>Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3873</th>\n",
       "      <td>3943</td>\n",
       "      <td>Bamboozled (2000)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3874</th>\n",
       "      <td>3944</td>\n",
       "      <td>Bootmen (2000)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3875</th>\n",
       "      <td>3945</td>\n",
       "      <td>Digimon: The Movie (2000)</td>\n",
       "      <td>Adventure|Animation|Children's</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3876</th>\n",
       "      <td>3946</td>\n",
       "      <td>Get Carter (2000)</td>\n",
       "      <td>Action|Drama|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3877</th>\n",
       "      <td>3947</td>\n",
       "      <td>Get Carter (1971)</td>\n",
       "      <td>Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3878</th>\n",
       "      <td>3948</td>\n",
       "      <td>Meet the Parents (2000)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3879</th>\n",
       "      <td>3949</td>\n",
       "      <td>Requiem for a Dream (2000)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3880</th>\n",
       "      <td>3950</td>\n",
       "      <td>Tigerland (2000)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3881</th>\n",
       "      <td>3951</td>\n",
       "      <td>Two Family House (2000)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3882</th>\n",
       "      <td>3952</td>\n",
       "      <td>Contender, The (2000)</td>\n",
       "      <td>Drama|Thriller</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3883 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      movie_id                                              title  \\\n",
       "0            1                                   Toy Story (1995)   \n",
       "1            2                                     Jumanji (1995)   \n",
       "2            3                            Grumpier Old Men (1995)   \n",
       "3            4                           Waiting to Exhale (1995)   \n",
       "4            5                 Father of the Bride Part II (1995)   \n",
       "5            6                                        Heat (1995)   \n",
       "6            7                                     Sabrina (1995)   \n",
       "7            8                                Tom and Huck (1995)   \n",
       "8            9                                Sudden Death (1995)   \n",
       "9           10                                   GoldenEye (1995)   \n",
       "10          11                     American President, The (1995)   \n",
       "11          12                 Dracula: Dead and Loving It (1995)   \n",
       "12          13                                       Balto (1995)   \n",
       "13          14                                       Nixon (1995)   \n",
       "14          15                            Cutthroat Island (1995)   \n",
       "15          16                                      Casino (1995)   \n",
       "16          17                       Sense and Sensibility (1995)   \n",
       "17          18                                  Four Rooms (1995)   \n",
       "18          19              Ace Ventura: When Nature Calls (1995)   \n",
       "19          20                                 Money Train (1995)   \n",
       "20          21                                  Get Shorty (1995)   \n",
       "21          22                                     Copycat (1995)   \n",
       "22          23                                   Assassins (1995)   \n",
       "23          24                                      Powder (1995)   \n",
       "24          25                           Leaving Las Vegas (1995)   \n",
       "25          26                                     Othello (1995)   \n",
       "26          27                                Now and Then (1995)   \n",
       "27          28                                  Persuasion (1995)   \n",
       "28          29                  City of Lost Children, The (1995)   \n",
       "29          30  Shanghai Triad (Yao a yao yao dao waipo qiao) ...   \n",
       "...        ...                                                ...   \n",
       "3853      3923                           Return of the Fly (1959)   \n",
       "3854      3924                                Pajama Party (1964)   \n",
       "3855      3925                      Stranger Than Paradise (1984)   \n",
       "3856      3926             Voyage to the Bottom of the Sea (1961)   \n",
       "3857      3927                            Fantastic Voyage (1966)   \n",
       "3858      3928       Abbott and Costello Meet Frankenstein (1948)   \n",
       "3859      3929                              Bank Dick, The (1940)   \n",
       "3860      3930         Creature From the Black Lagoon, The (1954)   \n",
       "3861      3931                     Giant Gila Monster, The (1959)   \n",
       "3862      3932                          Invisible Man, The (1933)   \n",
       "3863      3933                          Killer Shrews, The (1959)   \n",
       "3864      3934                                      Kronos (1957)   \n",
       "3865      3935                                      Kronos (1973)   \n",
       "3866      3936                   Phantom of the Opera, The (1943)   \n",
       "3867      3937                                     Runaway (1984)   \n",
       "3868      3938                 Slumber Party Massacre, The (1982)   \n",
       "3869      3939              Slumber Party Massacre II, The (1987)   \n",
       "3870      3940             Slumber Party Massacre III, The (1990)   \n",
       "3871      3941                     Sorority House Massacre (1986)   \n",
       "3872      3942                  Sorority House Massacre II (1990)   \n",
       "3873      3943                                  Bamboozled (2000)   \n",
       "3874      3944                                     Bootmen (2000)   \n",
       "3875      3945                          Digimon: The Movie (2000)   \n",
       "3876      3946                                  Get Carter (2000)   \n",
       "3877      3947                                  Get Carter (1971)   \n",
       "3878      3948                            Meet the Parents (2000)   \n",
       "3879      3949                         Requiem for a Dream (2000)   \n",
       "3880      3950                                   Tigerland (2000)   \n",
       "3881      3951                            Two Family House (2000)   \n",
       "3882      3952                              Contender, The (2000)   \n",
       "\n",
       "                              genres  \n",
       "0        Animation|Children's|Comedy  \n",
       "1       Adventure|Children's|Fantasy  \n",
       "2                     Comedy|Romance  \n",
       "3                       Comedy|Drama  \n",
       "4                             Comedy  \n",
       "5              Action|Crime|Thriller  \n",
       "6                     Comedy|Romance  \n",
       "7               Adventure|Children's  \n",
       "8                             Action  \n",
       "9          Action|Adventure|Thriller  \n",
       "10              Comedy|Drama|Romance  \n",
       "11                     Comedy|Horror  \n",
       "12              Animation|Children's  \n",
       "13                             Drama  \n",
       "14          Action|Adventure|Romance  \n",
       "15                    Drama|Thriller  \n",
       "16                     Drama|Romance  \n",
       "17                          Thriller  \n",
       "18                            Comedy  \n",
       "19                            Action  \n",
       "20               Action|Comedy|Drama  \n",
       "21              Crime|Drama|Thriller  \n",
       "22                          Thriller  \n",
       "23                      Drama|Sci-Fi  \n",
       "24                     Drama|Romance  \n",
       "25                             Drama  \n",
       "26                             Drama  \n",
       "27                           Romance  \n",
       "28                  Adventure|Sci-Fi  \n",
       "29                             Drama  \n",
       "...                              ...  \n",
       "3853                   Horror|Sci-Fi  \n",
       "3854                          Comedy  \n",
       "3855                          Comedy  \n",
       "3856                Adventure|Sci-Fi  \n",
       "3857                Adventure|Sci-Fi  \n",
       "3858                   Comedy|Horror  \n",
       "3859                          Comedy  \n",
       "3860                          Horror  \n",
       "3861                   Horror|Sci-Fi  \n",
       "3862                   Horror|Sci-Fi  \n",
       "3863                   Horror|Sci-Fi  \n",
       "3864                          Sci-Fi  \n",
       "3865                          Horror  \n",
       "3866                  Drama|Thriller  \n",
       "3867                 Sci-Fi|Thriller  \n",
       "3868                          Horror  \n",
       "3869                          Horror  \n",
       "3870                          Horror  \n",
       "3871                          Horror  \n",
       "3872                          Horror  \n",
       "3873                          Comedy  \n",
       "3874                    Comedy|Drama  \n",
       "3875  Adventure|Animation|Children's  \n",
       "3876           Action|Drama|Thriller  \n",
       "3877                        Thriller  \n",
       "3878                          Comedy  \n",
       "3879                           Drama  \n",
       "3880                           Drama  \n",
       "3881                           Drama  \n",
       "3882                  Drama|Thriller  \n",
       "\n",
       "[3883 rows x 3 columns]"
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mnames = ['movie_id','title','genres']\n",
    "movies = pd.read_table('./data/pydata-book/ch02/movielens/movies.dat',sep='::',header=None,names=mnames);movies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "gernes_iter=(set(x.split('|')) for x in movies.genres);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 229,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "genres = sorted(set.union(*gernes_iter))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 234,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Action</th>\n",
       "      <th>Adventure</th>\n",
       "      <th>Animation</th>\n",
       "      <th>Children's</th>\n",
       "      <th>Comedy</th>\n",
       "      <th>Crime</th>\n",
       "      <th>Documentary</th>\n",
       "      <th>Drama</th>\n",
       "      <th>Fantasy</th>\n",
       "      <th>Film-Noir</th>\n",
       "      <th>Horror</th>\n",
       "      <th>Musical</th>\n",
       "      <th>Mystery</th>\n",
       "      <th>Romance</th>\n",
       "      <th>Sci-Fi</th>\n",
       "      <th>Thriller</th>\n",
       "      <th>War</th>\n",
       "      <th>Western</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3853</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3854</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3855</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3856</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3857</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3858</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3859</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3860</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3861</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3862</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3863</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3864</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3865</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3866</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3867</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3868</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3869</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3870</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3871</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3872</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3873</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3874</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3875</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3876</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3877</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3878</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3879</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3880</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3881</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3882</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3883 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Action  Adventure  Animation  Children's  Comedy  Crime  Documentary  \\\n",
       "0        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "1        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "2        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "4        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "5        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "6        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "7        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "8        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "9        0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "10       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "11       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "12       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "13       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "14       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "15       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "16       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "17       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "18       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "19       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "20       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "21       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "22       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "23       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "24       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "25       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "26       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "27       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "28       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "29       0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "...      ...        ...        ...         ...     ...    ...          ...   \n",
       "3853     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3854     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3855     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3856     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3857     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3858     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3859     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3860     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3861     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3862     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3863     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3864     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3865     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3866     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3867     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3868     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3869     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3870     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3871     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3872     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3873     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3874     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3875     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3876     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3877     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3878     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3879     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3880     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3881     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "3882     0.0        0.0        0.0         0.0     0.0    0.0          0.0   \n",
       "\n",
       "      Drama  Fantasy  Film-Noir  Horror  Musical  Mystery  Romance  Sci-Fi  \\\n",
       "0       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "1       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "2       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "4       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "5       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "6       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "7       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "8       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "9       0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "10      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "11      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "12      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "13      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "14      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "15      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "16      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "17      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "18      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "19      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "20      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "21      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "22      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "23      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "24      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "25      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "26      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "27      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "28      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "29      0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "...     ...      ...        ...     ...      ...      ...      ...     ...   \n",
       "3853    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3854    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3855    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3856    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3857    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3858    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3859    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3860    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3861    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3862    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3863    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3864    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3865    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3866    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3867    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3868    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3869    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3870    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3871    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3872    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3873    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3874    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3875    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3876    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3877    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3878    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3879    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3880    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3881    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "3882    0.0      0.0        0.0     0.0      0.0      0.0      0.0     0.0   \n",
       "\n",
       "      Thriller  War  Western  \n",
       "0          0.0  0.0      0.0  \n",
       "1          0.0  0.0      0.0  \n",
       "2          0.0  0.0      0.0  \n",
       "3          0.0  0.0      0.0  \n",
       "4          0.0  0.0      0.0  \n",
       "5          0.0  0.0      0.0  \n",
       "6          0.0  0.0      0.0  \n",
       "7          0.0  0.0      0.0  \n",
       "8          0.0  0.0      0.0  \n",
       "9          0.0  0.0      0.0  \n",
       "10         0.0  0.0      0.0  \n",
       "11         0.0  0.0      0.0  \n",
       "12         0.0  0.0      0.0  \n",
       "13         0.0  0.0      0.0  \n",
       "14         0.0  0.0      0.0  \n",
       "15         0.0  0.0      0.0  \n",
       "16         0.0  0.0      0.0  \n",
       "17         0.0  0.0      0.0  \n",
       "18         0.0  0.0      0.0  \n",
       "19         0.0  0.0      0.0  \n",
       "20         0.0  0.0      0.0  \n",
       "21         0.0  0.0      0.0  \n",
       "22         0.0  0.0      0.0  \n",
       "23         0.0  0.0      0.0  \n",
       "24         0.0  0.0      0.0  \n",
       "25         0.0  0.0      0.0  \n",
       "26         0.0  0.0      0.0  \n",
       "27         0.0  0.0      0.0  \n",
       "28         0.0  0.0      0.0  \n",
       "29         0.0  0.0      0.0  \n",
       "...        ...  ...      ...  \n",
       "3853       0.0  0.0      0.0  \n",
       "3854       0.0  0.0      0.0  \n",
       "3855       0.0  0.0      0.0  \n",
       "3856       0.0  0.0      0.0  \n",
       "3857       0.0  0.0      0.0  \n",
       "3858       0.0  0.0      0.0  \n",
       "3859       0.0  0.0      0.0  \n",
       "3860       0.0  0.0      0.0  \n",
       "3861       0.0  0.0      0.0  \n",
       "3862       0.0  0.0      0.0  \n",
       "3863       0.0  0.0      0.0  \n",
       "3864       0.0  0.0      0.0  \n",
       "3865       0.0  0.0      0.0  \n",
       "3866       0.0  0.0      0.0  \n",
       "3867       0.0  0.0      0.0  \n",
       "3868       0.0  0.0      0.0  \n",
       "3869       0.0  0.0      0.0  \n",
       "3870       0.0  0.0      0.0  \n",
       "3871       0.0  0.0      0.0  \n",
       "3872       0.0  0.0      0.0  \n",
       "3873       0.0  0.0      0.0  \n",
       "3874       0.0  0.0      0.0  \n",
       "3875       0.0  0.0      0.0  \n",
       "3876       0.0  0.0      0.0  \n",
       "3877       0.0  0.0      0.0  \n",
       "3878       0.0  0.0      0.0  \n",
       "3879       0.0  0.0      0.0  \n",
       "3880       0.0  0.0      0.0  \n",
       "3881       0.0  0.0      0.0  \n",
       "3882       0.0  0.0      0.0  \n",
       "\n",
       "[3883 rows x 18 columns]"
      ]
     },
     "execution_count": 234,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ge_df = DataFrame(np.zeros((len(movies),len(genres))),columns=genres);ge_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "movies_ge = pd.concat([movies,ge_df],axis=1,join='outer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'movie_id', u'title', u'genres', u'Action', u'Adventure', u'Animation',\n",
       "       u'Children's', u'Comedy', u'Crime', u'Documentary', u'Drama',\n",
       "       u'Fantasy', u'Film-Noir', u'Horror', u'Musical', u'Mystery', u'Romance',\n",
       "       u'Sci-Fi', u'Thriller', u'War', u'Western'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 240,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies_ge.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 248,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for i,gen in enumerate(movies_ge.genres):\n",
    "    movies_ge.ix[i,gen.split('|')]=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 249,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
